# Mount Google Drive and import libraries
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# File paths
industrial_file = '/content/drive/MyDrive/ANNUAL_GAMMA_EXPECTATIONS_INDUSTRIAL_2005_2023.csv'
technology_file = '/content/drive/MyDrive/ANNUAL_GAMMA_EXPECTATIONS_TECHNOLOGY_2005_2023.csv'

# Read files
df_industrial = pd.read_csv(industrial_file)
df_technology = pd.read_csv(technology_file)

# Extract z5 data for industrial sector
z5_industrial = df_industrial[df_industrial['Variable'] == 'z5'].copy()
z5_industrial = z5_industrial[['Year', 'Gamma_Expectation']].rename(
    columns={'Gamma_Expectation': 'z5_industrial'}
)

# Extract z5 data for technology sector
z5_technology = df_technology[df_technology['Variable'] == 'z5'].copy()
z5_technology = z5_technology[['Year', 'Gamma_Expectation']].rename(
    columns={'Gamma_Expectation': 'z5_technology'}
)

# Merge data
df_z5 = pd.merge(z5_industrial, z5_technology, on='Year', how='inner')

# Vérification et conversion si nécessaire (pourcentage -> ratio)
convert_to_ratio = df_z5['z5_industrial'].max() > 1
if convert_to_ratio:
    df_z5['z5_industrial'] = df_z5['z5_industrial'] / 100
    df_z5['z5_technology'] = df_z5['z5_technology'] / 100
    adjustment = 0.1056 / 100
    print("Note: Values converted from percentage to ratio (divided by 100)")
    print(f"Adjustment applied: {adjustment:.6f} (0.1056% in ratio)")
else:
    adjustment = 0.1056
    print(f"Adjustment applied: {adjustment:.4f}")

# Calculate adjusted EBIT margin for TECH avec ajustement de 0.1056
df_z5['ebit_margin_adjusted_TECH'] = df_z5['z5_technology'] - adjustment

# Sélectionner les périodes
df_full = df_z5.copy()  # 2005-2023
df_2012 = df_z5[df_z5['Year'] >= 2012].copy()  # 2012-2023

print(f"\nAjustement utilisé: {adjustment:.6f}")
print(f"Note: EBIT Margin Adjusted TECH = z5_tech - {adjustment:.6f}")

# ============================================================================
# 1. CREATE STATISTICS TABLE (LEGEND) AS TEXT FILE
# ============================================================================
# Calculer les statistiques pour chaque période
def calculate_stats(df, period_name):
    corr, p_value = stats.pearsonr(df['ebit_margin_adjusted_TECH'], df['z5_industrial'])
    stats_dict = {
        'Period': period_name,
        'Years': f"{df['Year'].min()}-{df['Year'].max()}",
        'TECH_mean': df['ebit_margin_adjusted_TECH'].mean(),
        'TECH_std': df['ebit_margin_adjusted_TECH'].std(),
        'IND_mean': df['z5_industrial'].mean(),
        'IND_std': df['z5_industrial'].std(),
        'Correlation': corr,
        'p_value': p_value,
        'Mean_difference': np.abs(df['ebit_margin_adjusted_TECH'] - df['z5_industrial']).mean()
    }
    return stats_dict

stats_full = calculate_stats(df_full, "Full (2005-2023)")
stats_2012 = calculate_stats(df_2012, "Recent (2012-2023)")

# Tableau statistique
stats_text = f"""COMPARATIVE STATISTICS: EBIT MARGIN ANALYSIS
ADJUSTMENT: 0.1056{'%' if convert_to_ratio else ''}

PERIOD 2005-2023:
• Adjusted TECH EBIT Margin: Mean = {stats_full['TECH_mean']:.4f}, Std = {stats_full['TECH_std']:.4f}
• Industrial EBIT Margin: Mean = {stats_full['IND_mean']:.4f}, Std = {stats_full['IND_std']:.4f}
• Correlation TECH/IND: {stats_full['Correlation']:.4f} (p = {stats_full['p_value']:.4f})

PERIOD 2012-2023:
• Adjusted TECH EBIT Margin: Mean = {stats_2012['TECH_mean']:.4f}, Std = {stats_2012['TECH_std']:.4f}
• Industrial EBIT Margin: Mean = {stats_2012['IND_mean']:.4f}, Std = {stats_2012['IND_std']:.4f}
• Correlation TECH/IND: {stats_2012['Correlation']:.4f} (p = {stats_2012['p_value']:.4f})

NOTES:
• Adjusted TECH EBIT Margin = z5_tech - 0.1056{'%' if convert_to_ratio else ''}
• Industrial EBIT Margin = z5_industrial (EBIT/Revenue)
• All values expressed in {'ratio' if convert_to_ratio else 'percentage'}
• Shaded areas represent ±1 standard deviation from the mean
• Dotted lines represent period means
"""

# Save statistics table as text file
stats_txt_path = '/content/drive/MyDrive/EBIT_margin_statistics_table_adj_0_1056.txt'
with open(stats_txt_path, 'w') as f:
    f.write(stats_text)
print(f"Statistics table saved as text file: {stats_txt_path}")

# Display statistics table
print("\n" + "="*80)
print("STATISTICS TABLE")
print("="*80)
print(stats_text)

# ============================================================================
# 2. CREATE GRAPHS WITH LEGENDS LOWERED - AJUSTEMENT 0.1056
# ============================================================================
# Create a more compact figure
fig_graphs = plt.figure(figsize=(16, 7))

# Main title for graphs - placed higher to be visible
fig_graphs.suptitle(f'EBIT Margin: TECH (Adjusted -0.1056{"%" if convert_to_ratio else ""}) vs Industrial', 
                    fontsize=12, fontweight='bold', y=0.98)

# Create subplots
ax1 = plt.subplot(1, 2, 1)
ax2 = plt.subplot(1, 2, 2)

# Adjust subplot parameters - give more space at the top for title
plt.subplots_adjust(left=0.07, right=0.95, top=0.85, bottom=0.15, wspace=0.25)  # top=0.85 pour plus d'espace

# Graph 1: Period 2005-2023
# Plot series
line1, = ax1.plot(df_full['Year'], df_full['ebit_margin_adjusted_TECH'], 
                  marker='o', linewidth=2, color='black', markersize=5)

line2, = ax1.plot(df_full['Year'], df_full['z5_industrial'], 
                  marker='s', linewidth=2, color='gray', markersize=5, linestyle='--')

# Horizontal lines for means
mean_line1 = ax1.axhline(y=stats_full['TECH_mean'], color='black', 
                        linestyle=':', linewidth=1, alpha=0.7)

mean_line2 = ax1.axhline(y=stats_full['IND_mean'], color='gray', 
                        linestyle=':', linewidth=1, alpha=0.7)

# Standard deviation bands (shaded areas)
band1 = ax1.fill_between(df_full['Year'], 
                        stats_full['TECH_mean'] - stats_full['TECH_std'],
                        stats_full['TECH_mean'] + stats_full['TECH_std'],
                        alpha=0.1, color='black')

band2 = ax1.fill_between(df_full['Year'], 
                        stats_full['IND_mean'] - stats_full['IND_std'],
                        stats_full['IND_mean'] + stats_full['IND_std'],
                        alpha=0.1, color='gray')

# Configure graph 1
ax1.set_xlabel('Year', fontsize=10, fontweight='bold')
ax1.set_ylabel('EBIT Margin', fontsize=10, fontweight='bold')
ax1.grid(True, alpha=0.2, linestyle='--')
ax1.set_xticks(df_full['Year'])
ax1.set_xticklabels(df_full['Year'], rotation=45, fontsize=9)

# Add centered legend at the top for graph 1 - LOWERED POSITION
legend1 = ax1.legend([line1, line2, mean_line1, mean_line2, band1, band2],
                     ['TECH (Adj. -0.1056)', 'Industrial',
                      f'TECH Mean', 
                      f'IND Mean',
                      '±1 TECH σ', '±1 IND σ'],
                     loc='upper center',
                     bbox_to_anchor=(0.5, 1.08),  # BAISSÉ à 1.08 (était 1.22)
                     ncol=3,
                     fontsize=8,
                     framealpha=0.9,
                     fancybox=True,
                     handlelength=1.5,
                     columnspacing=0.8)

# Add period label
ax1.text(0.05, 0.95, '2005-2023', transform=ax1.transAxes,
         fontsize=11, fontweight='bold', verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

# Graph 2: Period 2012-2023
# Plot series
line3, = ax2.plot(df_2012['Year'], df_2012['ebit_margin_adjusted_TECH'], 
                  marker='o', linewidth=2, color='black', markersize=5)

line4, = ax2.plot(df_2012['Year'], df_2012['z5_industrial'], 
                  marker='s', linewidth=2, color='gray', markersize=5, linestyle='--')

# Horizontal lines for means
mean_line3 = ax2.axhline(y=stats_2012['TECH_mean'], color='black', 
                        linestyle=':', linewidth=1, alpha=0.7)

mean_line4 = ax2.axhline(y=stats_2012['IND_mean'], color='gray', 
                        linestyle=':', linewidth=1, alpha=0.7)

# Standard deviation bands (shaded areas)
band3 = ax2.fill_between(df_2012['Year'], 
                        stats_2012['TECH_mean'] - stats_2012['TECH_std'],
                        stats_2012['TECH_mean'] + stats_2012['TECH_std'],
                        alpha=0.1, color='black')

band4 = ax2.fill_between(df_2012['Year'], 
                        stats_2012['IND_mean'] - stats_2012['IND_std'],
                        stats_2012['IND_mean'] + stats_2012['IND_std'],
                        alpha=0.1, color='gray')

# Configure graph 2
ax2.set_xlabel('Year', fontsize=10, fontweight='bold')
ax2.set_ylabel('EBIT Margin', fontsize=10, fontweight='bold')
ax2.grid(True, alpha=0.2, linestyle='--')
ax2.set_xticks(df_2012['Year'])
ax2.set_xticklabels(df_2012['Year'], rotation=45, fontsize=9)

# Add centered legend at the top for graph 2 - LOWERED POSITION
legend2 = ax2.legend([line3, line4, mean_line3, mean_line4, band3, band4],
                     ['TECH (Adj. -0.1056)', 'Industrial',
                      f'TECH Mean', 
                      f'IND Mean',
                      '±1 TECH σ', '±1 IND σ'],
                     loc='upper center',
                     bbox_to_anchor=(0.5, 1.08),  # BAISSÉ à 1.08 (était 1.22)
                     ncol=3,
                     fontsize=8,
                     framealpha=0.9,
                     fancybox=True,
                     handlelength=1.5,
                     columnspacing=0.8)

# Add period label
ax2.text(0.05, 0.95, '2012-2023', transform=ax2.transAxes,
         fontsize=11, fontweight='bold', verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

# Save graphs as PDF
graphs_pdf_path = '/content/drive/MyDrive/EBIT_margin_graphs_adj_0_1056.pdf'
fig_graphs.savefig(graphs_pdf_path, dpi=300, bbox_inches='tight', format='pdf')
print(f"\nGraphs saved as PDF: {graphs_pdf_path}")

# Display graphs
plt.show()

# ============================================================================
# 3. STATISTICAL ANALYSIS WITH NEW ADJUSTMENT
# ============================================================================
print("\n" + "="*80)
print("STATISTICAL TESTS FOR PERIOD COMPARISON (ADJUSTMENT: 0.1056)")
print("="*80)

# T-test for TECH means between periods
t_tech_periods, p_tech_periods = stats.ttest_ind(df_full['ebit_margin_adjusted_TECH'], 
                                                 df_2012['ebit_margin_adjusted_TECH'],
                                                 equal_var=False)

# T-test for IND means between periods
t_ind_periods, p_ind_periods = stats.ttest_ind(df_full['z5_industrial'], 
                                               df_2012['z5_industrial'],
                                               equal_var=False)

print(f"\nTECH Comparison (2005-2023 vs 2012-2023):")
print(f"  t = {t_tech_periods:.4f}, p = {p_tech_periods:.4f}")
print(f"  Significant difference (p < 0.05): {'YES' if p_tech_periods < 0.05 else 'NO'}")

print(f"\nIndustrial Comparison (2005-2023 vs 2012-2023):")
print(f"  t = {t_ind_periods:.4f}, p = {p_ind_periods:.4f}")
print(f"  Significant difference (p < 0.05): {'YES' if p_ind_periods < 0.05 else 'NO'}")

# Correlation analysis
print(f"\nCorrelation Analysis:")
print(f"  Period 2005-2023: r = {stats_full['Correlation']:.4f} (p = {stats_full['p_value']:.4f})")
print(f"  Period 2012-2023: r = {stats_2012['Correlation']:.4f} (p = {stats_2012['p_value']:.4f})")
print(f"  Difference: {abs(stats_full['Correlation'] - stats_2012['Correlation']):.4f}")

# Calculate difference between TECH and Industrial
print("\n" + "="*80)
print("DIFFERENCE ANALYSIS: TECH vs INDUSTRIAL")
print("="*80)

# For full period
full_diff_mean = (df_full['ebit_margin_adjusted_TECH'] - df_full['z5_industrial']).mean()
full_diff_std = (df_full['ebit_margin_adjusted_TECH'] - df_full['z5_industrial']).std()

# For recent period
recent_diff_mean = (df_2012['ebit_margin_adjusted_TECH'] - df_2012['z5_industrial']).mean()
recent_diff_std = (df_2012['ebit_margin_adjusted_TECH'] - df_2012['z5_industrial']).std()

print(f"\nFull Period (2005-2023):")
print(f"  Mean(TECH - IND): {full_diff_mean:.6f} ({full_diff_mean*100 if convert_to_ratio else full_diff_mean:.4f}%)")
print(f"  Std Dev(TECH - IND): {full_diff_std:.6f}")

print(f"\nRecent Period (2012-2023):")
print(f"  Mean(TECH - IND): {recent_diff_mean:.6f} ({recent_diff_mean*100 if convert_to_ratio else recent_diff_mean:.4f}%)")
print(f"  Std Dev(TECH - IND): {recent_diff_std:.6f}")

# Paired t-test for difference
paired_t_full, paired_p_full = stats.ttest_rel(df_full['ebit_margin_adjusted_TECH'], df_full['z5_industrial'])
paired_t_recent, paired_p_recent = stats.ttest_rel(df_2012['ebit_margin_adjusted_TECH'], df_2012['z5_industrial'])

print(f"\nPaired t-test (TECH vs IND):")
print(f"  Full Period: t = {paired_t_full:.4f}, p = {paired_p_full:.4f}")
print(f"  Recent Period: t = {paired_t_recent:.4f}, p = {paired_p_recent:.4f}")

# Conclusion
print("\n" + "="*80)
print("CONCLUSION")
print("="*80)

if p_tech_periods > 0.05 and p_ind_periods > 0.05:
    print("The two periods (2005-2023 and 2012-2023) are STATISTICALLY SIMILAR.")
    print("No significant difference was detected in the means.")
else:
    print("The periods show STATISTICALLY SIGNIFICANT DIFFERENCES.")
    if p_tech_periods < 0.05:
        print(f"- TECH EBIT margin changed significantly (p = {p_tech_periods:.4f})")
    if p_ind_periods < 0.05:
        print(f"- Industrial EBIT margin changed significantly (p = {p_ind_periods:.4f})")

print(f"\nWith adjustment of 0.1056{'%' if convert_to_ratio else ''}:")
if full_diff_mean > 0:
    print(f"Full period: TECH is on average {abs(full_diff_mean):.6f} higher than Industrial")
else:
    print(f"Full period: TECH is on average {abs(full_diff_mean):.6f} lower than Industrial")

if recent_diff_mean > 0:
    print(f"Recent period: TECH is on average {abs(recent_diff_mean):.6f} higher than Industrial")
else:
    print(f"Recent period: TECH is on average {abs(recent_diff_mean):.6f} lower than Industrial")

# Print file locations
print("\n" + "="*80)
print("FILES CREATED")
print("="*80)
print(f"1. Statistics table (TXT): {stats_txt_path}")
print(f"2. Graphs with lowered legends (PDF): {graphs_pdf_path}")

# Create PNG version of graphs as well
graphs_png_path = '/content/drive/MyDrive/EBIT_margin_graphs_adj_0_1056.png'
fig_graphs.savefig(graphs_png_path, dpi=300, bbox_inches='tight')
print(f"3. Graphs with lowered legends (PNG): {graphs_png_path}")

print("\nAll files are saved in your Google Drive.")
print(f"Adjustment used: 0.1056{'%' if convert_to_ratio else ''}")